/* ------------------------------------------------------------------CPS_taxsim_createtaxunits.do
This file is taken from Jeff Larrimore's original code base and create tax units as described in:

{CITE}
5/22/13
- Added section to recode time inconsistent variables/differences from Unicon/census
- Changed structure to deal with individual years rather than a full extract

Last updated: 5/22/13

// --------------------------------------------------------------------------------------------
*/

// This stuff is temporary [until we integrate the code/move the data]
cd  "C:\Users\Stuart\Dropbox\InsecurityLit"
* cd "F:\Dropbox\InsecurityLit"

cap confirm file CPS_taxsim_tukey.dta blah
if _rc!=0 {
	forvalues y=1985/2011 {
		* local y = 1986
		local yof = `y'+1

		use CPS_mar`yof', clear
		
		cap rename h_seq hhseqnum
		cap rename ppos pppos
		cap rename marstat married
		cap rename a_maritl married
		cap rename prmarsta married
		cap rename marsuppw pwgt
		cap rename marsupwt pwgt
		cap rename a_age age
		cap rename peage age
		cap rename prfamtyp a_famtyp // 1995 issue
		if `yof'>=1988 {
			cap rename prfamrel a_famrel
			cap drop famrel
			rename a_famrel famrel
		}
		
		qui gen year = `y'

		***************************** 
		*CREATING SIMULATED TAX UNITS 
		***************************** 
		* replace year = year + 1899
		*drop if year>=1995
		gen sfid = ffpos 
		* gen pppos = .
		* replace pppos = perid 
		* rename famknd fkind 
		* rename marstat married


		*Pull the family weights - which are the weight of the family reference person  
		/* Jeff's:
		sort year hhseq sfid  
		by year hhseq sfid: egen famhead = min(pppos)  
		by year hhseq sfid: gen famsize = _N  
		gen temp = wgt if pppos==famhead  
		by year hhseq sfid: egen fwgt = max(temp)  
		drop temp
		*/
		// Mine:
		sort year hhseq sfid
		qui egen famhead = min(pppos), by(year hhseq sfid)
		bys year hhseq sfid: gen famsize = _N
		qui gen temp = pwgt if pppos==famhead
		qui egen fwgt = max(temp), by(year hhseq sfid)
		drop temp 


		// Assignments:

		*NOTE: in internal program used TYPEBOX before 1986 rather than FKIND, so some code changed, but same results
		*Place unrelated secondary individuals in their own family  
		gen taxunit = .
		if `y'<=1986 qui replace taxunit = pppos + 70 if fkind==5
		else qui replace taxunit = pppos if a_famtyp==5

		*Place nonfamily householders in their own family  
		if `y'<=1986 qui replace taxunit = pppos + 70 if fkind==4  
		else qui replace taxunit = pppos if a_famtyp==2  

		*Assign primary family members to the primary family  
		if `y'<=1986 qui replace taxunit = sfid if fkind==1  
		else qui replace taxunit = sfid if a_famtyp==1  

		*Assign related/unrelated subfamily members to the correct subfamily  
		if `y'<=1986 qui replace taxunit = sfid if inlist(fkind,2,3)
		else qui replace taxunit = sfid if inlist(a_famtyp,3,4)

		*Assign adult children to their own subfamily if not a reference person or spouse of family  
		if `y'<=1986 {
			qui replace taxunit = pppos + 70 if age>=20 & (relhead==4 | relhead==5) & fkind==1 
			qui replace taxunit = pppos + 70 if age>=20 & (famrel>2)
		}
		else {
			qui replace taxunit = pppos      if age>=20 & (famrel>2)
		}

		*Assign ever-married children to their own subfamily IF not reference person or spouse of family (i.e. are child or other relative)  
		if `y'<=1986 {
			qui replace taxunit = pppos + 70 if age<=19 & married<8 & (relhead==4 | relhead==5)   & fkind==1
			qui replace taxunit = pppos + 70 if age<=19 & married<8 & (famrel>2)
		}
		else {
			qui replace taxunit = pppos      if age<=19 & married<7 & (famrel>2) & year>=1987  
		}

		*Calculate age of oldest subfamily member  
		sort year hhseq taxunit  
		qui egen maxage = max(age), by(year hhseq taxunit)

		*Determine if anybody is ever-married in the subfamily  
		qui gen temp = 0  
		qui replace temp = 1 if (year<=1987 & married<=7) | (year>=1988 & married<=6)  
		qui egen anymarried = max(temp), by(year hhseq taxunit) 
		drop temp  

		*Assign individuals in unmarried, under-19 headed subfamilies to the primary family in the household  
		*In cases of no primary family, assign to family of oldest individual in household  
		*Individuals under 15 who live alone are dropped since no income asked.  Those 15 and older are kept as own subfamily  
		sort year hhseq pppos  
		qui egen maxhhage = max(age), by(year hhseq)  
		if `y'<1987 qui replace taxunit = 0 if year<=1987 & anymarried==0 & maxage<=19 & maxhhage>19  
		else 		qui replace taxunit = 1 if year>=1988 & anymarried==0 & maxage<=19 & maxhhage>19  
		sort year hhseq taxunit  
		qui egen maxage2 = max(age), by(year hhseq taxunit)  

		qui gen oldest = 1 if age==maxhhage  
		qui egen temp = sum(oldest), by(year hhseq)  
		sort year hhseq oldest  
		qui egen temp2 = min(pppos) , by(year hhseq oldest) 
		qui replace oldest = 0 if temp>1 & pppos!=temp2  
		qui gen temp3 = 0  
		qui replace temp3 = taxunit if oldest==1  
		qui egen oldesttaxunit = max(temp3), by(year hhseq)   
		replace taxunit = oldesttaxunit if anymarried==0 & maxage2<=19 & maxhhage>19  
		drop temp temp2 temp3  

		sort year hhseq taxunit   
		by year hhseq taxunit: gen sfsize = _N  
		
		// Currently married flag (what we will use to input filer status)
		// NOTE: we only count you married if your spouse is present because we can't pick up
		// 		 non-present spousal income and we don't want to pool 1 person's income over potentially 2
		//		 [this decision does not affect a large number of people]
		cap drop married_now
		qui gen married_now=0
		qui replace married_now = 1 if inlist(married,1,2)	

		*Adjust for the fact that I am breaking up families as originally weighted - so divide the weights to the new 
		*subfamilies based on the size of their respective families 
		qui replace fwgt = fwgt * sfsize/famsize if sfsize<=famsize  
		* qui gen income = pinc - trans - ss
		* collapse /*(sum) income */ (mean) fwgt, by(year hhseq taxunit)
		* save tax_temp.dta, replace
		rename fwgt tu_wgt
		rename hhseq h_seq
		rename pppos ppos
		qui gen yof = `yof'
		
		keep yof h_seq ppos tu_wgt taxunit married_now
		if `y'>1985 append using CPS_taxsim_tukey
		save CPS_taxsim_tukey, replace
		di "`yof' file added"
	}
}



exit


/* Benchmark
- We want to get proportions of household structure right
	- Married (all are joint)
	- Single individual
	- Single individual with dependents
	- Dependent filers - never picked up
- We want our distribution of AGI to be about right
*/
use CPS_taxsim_tukey, clear
preserve
	qui egen tu_uniq = group(yof h_seq taxunit)
	gen N=1
	collapse (max) married_now (mean) tu_wgt yof (count) N, by(tu_uniq)
	qui gen single = N==1
	qui gen head = single==0&married_now==0
	collapse (mean) married_now single head [aw=tu_wgt], by(yof)
	list
restore
